Mexico analysis¶

Load libraries¶

import warnings
from functools import partial

import covid_analysis.utils.paths as path
import janitor
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_flavor as pf
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import prince
import seaborn as sns
from plotly.offline import init_notebook_mode

Set defaults for plots¶

# matplotlib
plt.style.use("seaborn-whitegrid")
plt.rcParams["figure.figsize"] = (10, 8)

# seaborn
sns.set_style("whitegrid")

# plotly
init_notebook_mode()
pio.templates.default = "plotly_white"
pd.options.plotting.backend = "plotly"

# Some plot warninigs
warnings.filterwarnings("ignore")

Load data¶

covid_mex_file = path.data_processed_dir("positive_covid_mex.csv")
covid_mex_df = pd.read_csv(
    filepath_or_buffer=covid_mex_file,
    parse_dates=["date_admission", "date_symptoms", "date_death"],
    date_parser=lambda x: pd.to_datetime(x, errors="coerce")
)

covid_mex_df.head(1)
origin sector state sex patient_type date_admission date_symptoms date_death intubated pneumonia ... epoc asthma immunosuppressed hypertension other_comorbidity cardiovascular obesity chronic_kidney smoking icu
0 Out of USMER SSA CIUDAD DE MÉXICO Man Ambulatory 2020-10-16 2020-10-16 NaT Does not apply No ... No No No No No No No No No Does not apply

1 rows × 23 columns

Calculate new data¶

Death column indicator¶

covid_mex_df["survived"] = covid_mex_df.date_death.isna()
covid_mex_df["survived"] = np.where(covid_mex_df.survived, "Survived", "Death")

Binned ages¶

bins = [x*4 for x in range(0,32)]
labels = ["{}-{}".format(start, end) for start, end in zip(bins[:-1], bins[1:])]
covid_mex_df["age_range"] = pd.cut(covid_mex_df.age, bins=bins, include_lowest=True, labels=labels)
covid_mex_df = covid_mex_df.sort_values("survived", ascending=False)

Covid involvement by age and sex¶

(
    covid_mex_df
    .groupby(["survived", "sex"])
    .size()
    .reset_index(name="n")
    .pipe(
        lambda df: (
            px.sunburst(
                data_frame=df,
                path=["survived", "sex"],
                values="n",
                labels=dict(
                    sex="Sex",
                    n="Count",
                ),
                hover_name="survived",
                hover_data=dict(
                    survived=False,
                )
            )
        )
    )
)
(
    covid_mex_df
    .groupby(["sex", "age_range", "survived"])
    .size()
    .reset_index(name="count")
    .pipe(
        lambda df: (
            px.bar(
                data_frame=df,
                x="age_range",
                y="count",
                color="survived",
                facet_row="sex",
                labels=dict(
                    age_range="Age",
                    count="Confirmed cases",
                    survived="Status"
                ),
                category_orders=dict(
                    survived=["Survived", "Death"]
                ),
                log_y=True
            )
            .for_each_annotation(
                lambda a: a.update(text=a.text.split("=")[1])
            )
            .update_xaxes(tickangle=315)
            .update_traces(hovertemplate='Count: %{y} <br> Age: %{x}')
            .update_layout(
                legend=dict(
                    orientation="h",
                    yanchor="bottom",
                    y=1.02,
                    xanchor="right",
                    x=1
                ),
                title_x=0.5
            )
        )
    )
)

Time from infection to death¶

(
    covid_mex_df
    .filter_on("survived == 'Death'")
    .assign(
        life_time=lambda df: (df.date_death - df.date_symptoms).dt.days
    )
    .filter_on("life_time > 0 and life_time < 100")
    .pipe(
        lambda df: (
            px.histogram(
                data_frame=df,
                x="life_time",
                color="sex",
                marginal="box",
                labels=dict(
                    life_time="Number of days",
                    count="Count",
                    sex="Sex"
                )
            )
            .update_traces(hovertemplate='Count: %{y} <br> Time before dying: %{x}')
            .update_layout(
                legend=dict(
                    orientation="h",
                    yanchor="bottom",
                    y=1.02,
                    xanchor="right",
                    x=1
                )
            )
        )
    )
)
(
    covid_mex_df
    .filter_on("survived == 'Death'")
    .assign(
        life_time=lambda df: (df.date_death - df.date_symptoms).dt.days
    )
    .filter_on("life_time > 0")
    .filter_on("icu in ['Yes', 'No']")
    .select_columns(["age_range", "icu", "life_time"])
    .groupby(["age_range", "icu"])
    .life_time
    .describe()
    [["count", "mean"]]
    .reset_index()
    .pipe(
        lambda df: (
            px.bar(
                data_frame=df,
                x="age_range",
                y="count",
                color="mean",
                facet_row="icu",
                labels=dict(
                    age_range="Age range",
                    count="Count",
                    mean="Mean lifetime",
                    icu="ICU"
                ),
                barmode="group"
            )
            .update_yaxes(matches=None, showticklabels=True)
            .update_xaxes(tickangle=315)
            .update_layout(
                legend=dict(
                    orientation="h",
                    yanchor="bottom",
                    y=1.02,
                    xanchor="right",
                    x=1
                )
            )
        )
    )
)

Number of deaths per state and where they happened¶

(
    covid_mex_df
    .groupby(["state", "origin", "sector", "patient_type"])
    .size()
    .reset_index(name="n")
    .pipe(
        lambda df: (
            px.treemap(
                data_frame=df,
                path=[px.Constant("All"), "state", "origin", "sector", "patient_type"],
                values="n",
                labels=dict(
                    n="Number of deaths"
                )
            )
        )
    
    )

)

Patien conditions for survival¶

yes_or_no = [
    'sex', 'intubated', 'pneumonia',
    'pregnancy', 'diabetes', 'epoc', 'asthma', 'immunosuppressed',
    'hypertension', 'other_comorbidity', 'cardiovascular', 'obesity',
    'chronic_kidney', 'smoking', 'icu', 'survived'
]
def iplot_coordinates(
    mca,
    X,
    x_component=1,
    y_component=2,
    show_row_points=False,
    show_column_points=True,
    highlight = None,
    text_size=10
):
    
    x, y = x_component - 1, y_component - 1
    color, text = None, None
    
    if show_row_points:
        df = (
            mca.
            row_coordinates(X)[[x_component, y_component]])
        
        if highlight:
            df[highlight] = X[highlight]
            color = highlight
    else:
        df = (
            mca.
            column_coordinates(X)[[x_component, y_component]].
            reset_index().
            rename(columns={"index": "full_text"}).
            assign(
                full_text=lambda x: x.full_text.str.split("_"),
                Category =lambda x: x.full_text.apply(lambda y: y[0]),
                Values = lambda x: x.full_text.apply(lambda y: "_".join(y[1:]))
            )
        )
        
        color = "Category"
        text = "Values"
        
    df.rename(columns={x_component:"x", y_component:"y"}, inplace=True)
    
    fig = px.scatter(
        data_frame=df,
        x="x",
        y="y",
        color=color,
        text=text,
        labels = {
            "x": "Component {} ({:.2f}% intertia)".format(x_component, mca.explained_inertia_[x_component] * 100),
            "y": "Component {} ({:.2f}% intertia)".format(y_component, mca.explained_inertia_[y_component] * 100)
        }
    )
    
    fig.update_traces(textposition="top right")
    fig.update_layout(
        font=dict(size=text_size),
        shapes=[
            dict(
                type="line",
                yref="paper", y0=0, y1=1,
                xref="x", x0=0, x1=0
            ),
            dict(
                type="line",
                yref="y", y0=0, y1=0,
                xref="paper", x0=0, x1=1
            ),
            
        ]
    )    
    return fig 
X = (
    covid_mex_df
    .select_columns(yes_or_no)
)

mca = prince.MCA(n_components=4, random_state=42)
mca = mca.fit(X)
mca
MCA(n_components=4, random_state=42)
import functools
x_component, y_component = 1, 2
plot_mca = functools.partial(iplot_coordinates, mca=mca, X=X, x_component=x_component, y_component=y_component)
plot_mca(show_row_points=False)
(
    covid_mex_df
    .filter_on("survived == 'Death'")
    .pipe(
        lambda df: (
            px.parallel_categories(
                data_frame=df,
                dimensions=["icu", "chronic_kidney", "intubated", "pneumonia", "epoc", "cardiovascular"],
                labels=dict(
                    icu="ICU",
                    chronic_kidney="Chronic kidney",
                    intubated="Intubated",
                    pneumonia="Pneumonia",
                    epoc="EPOC",
                    cardiovascular="Cardiovascular"
    
                )
            )
            .update_traces(hoveron="color", hoverinfo="count+probability")
        )
    )
)